#!/usr/bin/env python3
"""Convert paper.md to paper.docx — SSRN-ready with tables, figures, references."""

import re
from pathlib import Path
from docx import Document
from docx.shared import Inches, Pt, Cm, RGBColor, Emu
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.ns import qn, nsdecls
from docx.oxml import parse_xml

VERSION = '20260224_r2'


# ---------------------------------------------------------------------------
# BibTeX parser
# ---------------------------------------------------------------------------

def parse_bib(bib_path):
    """Parse a .bib file into a dict keyed by citation key."""
    entries = {}
    text = bib_path.read_text(encoding='utf-8')
    # Split into entries
    for m in re.finditer(
        r'@(\w+)\{(\w+),\s*(.*?)\n\}', text, re.DOTALL
    ):
        entry_type, key, body = m.group(1), m.group(2), m.group(3)
        fields = {}
        for fm in re.finditer(
            r'(\w+)\s*=\s*(?:\{((?:[^{}]|\{[^{}]*\})*)\}|"([^"]*)")',
            body
        ):
            fname = fm.group(1).lower()
            fval = fm.group(2) if fm.group(2) is not None else fm.group(3)
            # Clean LaTeX
            fval = fval.replace('{', '').replace('}', '')
            fval = fval.replace(r'\"o', '\u00f6').replace(r'\"a', '\u00e4')
            fval = fval.replace(r'\"u', '\u00fc')
            fval = re.sub(r'\s+', ' ', fval).strip()
            fields[fname] = fval
        fields['_type'] = entry_type
        entries[key] = fields
    return entries


def format_bib_entry(key, entry):
    """Format a single bibliography entry as a string."""
    authors = entry.get('author', 'Unknown')
    year = entry.get('year', '')
    title = entry.get('title', '')
    journal = entry.get('journal', '')
    volume = entry.get('volume', '')
    number = entry.get('number', '')
    pages = entry.get('pages', '')
    institution = entry.get('institution', '')
    booktitle = entry.get('booktitle', '')
    etype = entry.get('_type', 'article')
    note = entry.get('note', '')

    # Author formatting: last names only for brevity in working paper
    parts = [f'{authors} ({year}).']
    parts.append(f' "{title}."')

    if etype == 'article' and journal:
        ref = f' {journal}'
        if volume:
            ref += f' {volume}'
            if number:
                ref += f'({number})'
        if pages:
            ref += f': {pages}'
        ref += '.'
        parts.append(ref)
    elif etype in ('techreport',) and institution:
        tp = entry.get('type', 'Working Paper')
        num = entry.get('number', '')
        ref = f' {institution}, {tp}'
        if num:
            ref += f' {num}'
        ref += '.'
        parts.append(ref)
    elif etype == 'incollection' and booktitle:
        editor = entry.get('editor', '')
        ref = f' In: {booktitle}'
        if editor:
            ref += f', ed. {editor}'
        if pages:
            ref += f', pp. {pages}'
        ref += '.'
        parts.append(ref)
    elif etype == 'misc':
        if note:
            parts.append(f' {note}.')

    return ''.join(parts)


def make_inline_citation(key, bib):
    """Convert @key to Author (Year) or Author et al. (Year)."""
    entry = bib.get(key)
    if not entry:
        return key
    authors = entry.get('author', key)
    year = entry.get('year', '')
    # Extract first author surname
    first = authors.split(' and ')[0].strip()
    # Handle "Last, First" format
    if ',' in first:
        surname = first.split(',')[0].strip()
    else:
        surname = first.split()[-1] if first.split() else first
    n_authors = len(authors.split(' and '))
    if n_authors > 2:
        return f'{surname} et al. ({year})'
    elif n_authors == 2:
        second = authors.split(' and ')[1].strip()
        if ',' in second:
            surname2 = second.split(',')[0].strip()
        else:
            surname2 = second.split()[-1] if second.split() else second
        return f'{surname} and {surname2} ({year})'
    return f'{surname} ({year})'


# ---------------------------------------------------------------------------
# YAML front matter
# ---------------------------------------------------------------------------

def parse_yaml_front_matter(text):
    """Extract YAML front matter."""
    m = re.match(r'^---\n(.*?)\n---\n', text, re.DOTALL)
    if not m:
        return {}, text
    yaml_block = m.group(1)
    meta = {}
    for key in ['title', 'author', 'date']:
        km = re.search(rf'^{key}:\s*"?(.*?)"?\s*$', yaml_block, re.MULTILINE)
        if km:
            meta[key] = km.group(1).strip('"')
    am = re.search(r'abstract:\s*\|\s*\n(.*?)(?=\n\w+:|$)', yaml_block, re.DOTALL)
    if am:
        meta['abstract'] = re.sub(r'\n\s{2,}', '\n', am.group(1)).strip()
    km = re.search(r'^keywords:\s*"?(.*?)"?\s*$', yaml_block, re.MULTILINE)
    if km:
        meta['keywords'] = km.group(1).strip('"')
    jm = re.search(r'^jel:\s*"?(.*?)"?\s*$', yaml_block, re.MULTILINE)
    if jm:
        meta['jel'] = jm.group(1).strip('"')
    return meta, text[m.end():]


# ---------------------------------------------------------------------------
# Formatting helpers
# ---------------------------------------------------------------------------

def add_formatted_run(paragraph, text, bold=False, italic=False, size=None,
                      color=None, superscript=False):
    """Add a run with formatting."""
    run = paragraph.add_run(text)
    run.bold = bold
    run.italic = italic
    if size:
        run.font.size = Pt(size)
    if color:
        run.font.color.rgb = RGBColor(*color)
    if superscript:
        run.font.superscript = True
    return run


_SUBSCRIPTS = {
    '0': '\u2080', '1': '\u2081', '2': '\u2082', '3': '\u2083',
    '4': '\u2084', '5': '\u2085', '6': '\u2086', '7': '\u2087',
    '8': '\u2088', '9': '\u2089', 'i': '\u1d62', 'j': '\u2c7c',
    'k': '\u2096', 't': '\u209c', 'n': '\u2099',
}

_SUPERSCRIPTS = {
    '0': '\u2070', '1': '\u00b9', '2': '\u00b2', '3': '\u00b3',
    '4': '\u2074', '5': '\u2075', '6': '\u2076', '7': '\u2077',
    '8': '\u2078', '9': '\u2079',
}

_LATEX_SYMBOLS = [
    (r'\alpha', '\u03b1'), (r'\beta', '\u03b2'), (r'\gamma', '\u03b3'),
    (r'\delta', '\u03b4'), (r'\theta', '\u03b8'), (r'\phi', '\u03c6'),
    (r'\rho', '\u03c1'), (r'\Delta', '\u0394'), (r'\sum', '\u2211'),
    (r'\times', '\u00d7'), (r'\cdot', '\u00b7'), (r'\neq', '\u2260'),
    (r'\approx', '\u2248'), (r'\leq', '\u2264'), (r'\geq', '\u2265'),
    (r'\to', '\u2192'), (r'\rightarrow', '\u2192'), (r'\in', '\u2208'),
    (r'\ldots', '\u2026'), (r'\pm', '\u00b1'), (r'\infty', '\u221e'),
    (r'\sigma', '\u03c3'), (r'\lambda', '\u03bb'), (r'\mu', '\u03bc'),
    (r'\epsilon', '\u03b5'), (r'\pi', '\u03c0'),
]


def add_rich_text(paragraph, text, base_size=11, base_italic=False, bib=None):
    """Parse inline markdown (bold, italic, math, citations) and add runs."""
    remaining = text

    while remaining:
        patterns = [
            (r'\*\*\*(.*?)\*\*\*', 'bold_italic'),
            (r'\*\*(.*?)\*\*', 'bold'),
            (r'\*(.*?)\*', 'italic'),
            (r'\$\$(.*?)\$\$', 'display_math'),
            (r'\$(.*?)\$', 'math'),
            (r'\[([^\]]*@[^\]]*)\]', 'bracket_cite'),
            (r'@(\w+\d{4}\w?)', 'citation'),
            (r'---', 'emdash'),
        ]

        earliest_match = None
        earliest_pos = len(remaining)
        earliest_type = None

        for pat, ptype in patterns:
            m = re.search(pat, remaining)
            if m and m.start() < earliest_pos:
                earliest_match = m
                earliest_pos = m.start()
                earliest_type = ptype

        if earliest_match is None:
            if remaining:
                add_formatted_run(paragraph, remaining, size=base_size,
                                  italic=base_italic)
            break

        if earliest_pos > 0:
            add_formatted_run(paragraph, remaining[:earliest_pos],
                              size=base_size, italic=base_italic)

        if earliest_type == 'bold_italic':
            add_formatted_run(paragraph, earliest_match.group(1),
                              bold=True, italic=True, size=base_size)
        elif earliest_type == 'bold':
            add_formatted_run(paragraph, earliest_match.group(1),
                              bold=True, size=base_size)
        elif earliest_type == 'italic':
            add_formatted_run(paragraph, earliest_match.group(1),
                              italic=True, size=base_size)
        elif earliest_type in ('math', 'display_math'):
            math_text = earliest_match.group(1)
            for latex, uni in _LATEX_SYMBOLS:
                math_text = math_text.replace(latex, uni)
            math_text = math_text.replace(r'\frac{', '')
            math_text = math_text.replace('}{', '/')
            math_text = math_text.replace('{', '').replace('}', '')
            math_text = re.sub(r'_(\w)',
                               lambda m: _SUBSCRIPTS.get(m.group(1),
                                                         '_' + m.group(1)),
                               math_text)
            math_text = re.sub(r'\^(\w)',
                               lambda m: _SUPERSCRIPTS.get(m.group(1),
                                                           '^' + m.group(1)),
                               math_text)
            math_text = re.sub(r'\\text\{([^}]*)\}', r'\1', math_text)
            add_formatted_run(paragraph, math_text, italic=True,
                              size=base_size)
        elif earliest_type == 'citation':
            cite_key = earliest_match.group(1)
            if bib:
                cite_text = make_inline_citation(cite_key, bib)
            else:
                cite_text = cite_key
            add_formatted_run(paragraph, cite_text, size=base_size)
        elif earliest_type == 'bracket_cite':
            content = earliest_match.group(1)
            if bib:
                # Replace each @key with inline citation
                rendered = re.sub(
                    r'@(\w+\d{4}\w?)',
                    lambda m: make_inline_citation(m.group(1), bib),
                    content
                )
            else:
                rendered = re.sub(r'@(\w+)', r'\1', content)
            add_formatted_run(paragraph, f'({rendered.strip("; ")})',
                              size=base_size)
        elif earliest_type == 'emdash':
            add_formatted_run(paragraph, '\u2014', size=base_size)

        remaining = remaining[earliest_match.end():]


# ---------------------------------------------------------------------------
# Tables
# ---------------------------------------------------------------------------

def parse_markdown_table(lines):
    """Parse markdown table lines into header and rows."""
    if len(lines) < 2:
        return None, None

    def parse_row(line):
        cells = [c.strip() for c in line.split('|')]
        if cells and cells[0] == '':
            cells = cells[1:]
        if cells and cells[-1] == '':
            cells = cells[:-1]
        return cells

    header = parse_row(lines[0])
    rows = [parse_row(l) for l in lines[2:] if l.strip()]
    return header, rows


def add_table(doc, header, rows, title=None, notes=None, bib=None):
    """Add a formatted table to the document."""
    if title:
        p = doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        p.space_before = Pt(12)
        add_formatted_run(p, title, bold=True, size=11)

    ncols = len(header)
    nrows = len(rows) + 1
    table = doc.add_table(rows=nrows, cols=ncols)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    table.style = 'Table Grid'
    table.autofit = True

    # Header row
    for j, cell_text in enumerate(header):
        cell = table.rows[0].cells[j]
        cell.text = ''
        p = cell.paragraphs[0]
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        add_rich_text(p, cell_text, base_size=9, bib=bib)
        for run in p.runs:
            run.bold = True
            run.font.size = Pt(9)
        shading = parse_xml(f'<w:shd {nsdecls("w")} w:fill="D9E2F3"/>')
        cell._tc.get_or_add_tcPr().append(shading)

    # Data rows
    for i, row_data in enumerate(rows):
        for j, cell_text in enumerate(row_data):
            if j >= ncols:
                continue
            cell = table.rows[i + 1].cells[j]
            cell.text = ''
            p = cell.paragraphs[0]
            is_first = j == 0
            p.alignment = (WD_ALIGN_PARAGRAPH.LEFT if is_first
                           else WD_ALIGN_PARAGRAPH.CENTER)
            add_rich_text(p, cell_text, base_size=9, bib=bib)
            if i % 2 == 1:
                shading = parse_xml(
                    f'<w:shd {nsdecls("w")} w:fill="F2F2F2"/>')
                cell._tc.get_or_add_tcPr().append(shading)

    for row in table.rows:
        for cell in row.cells:
            for p in cell.paragraphs:
                for run in p.runs:
                    if run.font.size is None:
                        run.font.size = Pt(9)

    if notes:
        p = doc.add_paragraph()
        p.space_before = Pt(2)
        p.space_after = Pt(12)
        add_rich_text(p, notes, base_size=8, base_italic=True, bib=bib)

    return table


def read_external_table(md_path, paper_dir):
    """Read an external .md table file and return (lines, notes)."""
    fpath = paper_dir / md_path
    if not fpath.exists():
        return [], None
    content = fpath.read_text(encoding='utf-8')
    all_lines = content.strip().split('\n')
    table_lines = []
    notes = None
    for line in all_lines:
        if line.strip().startswith('|'):
            table_lines.append(line)
        elif re.match(r'^\*Note', line.strip()):
            notes = line.strip().strip('*').strip()
    return table_lines, notes


# ---------------------------------------------------------------------------
# Figures
# ---------------------------------------------------------------------------

def add_figure(doc, img_path, caption, fig_num, paper_dir):
    """Add a figure with caption."""
    full_path = paper_dir / img_path
    if not full_path.exists():
        # Add placeholder text
        p = doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        add_formatted_run(p, f'[Figure {fig_num}: {img_path} not found]',
                          italic=True, size=10,
                          color=(180, 0, 0))
        return

    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_before = Pt(12)
    run = p.add_run()
    run.add_picture(str(full_path), width=Inches(5.5))

    # Caption
    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_after = Pt(12)
    add_formatted_run(p, f'Figure {fig_num}: ', bold=True, size=10)
    add_formatted_run(p, caption, italic=True, size=10)


# ---------------------------------------------------------------------------
# Page numbers
# ---------------------------------------------------------------------------

def add_page_numbers(doc):
    """Add page numbers to footer (centered)."""
    for section in doc.sections:
        footer = section.footer
        footer.is_linked_to_previous = False
        p = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        # PAGE field
        run = p.add_run()
        run.font.size = Pt(9)
        run.font.name = 'Times New Roman'
        fld_char_begin = parse_xml(
            f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>')
        run._r.append(fld_char_begin)
        run2 = p.add_run()
        run2.font.size = Pt(9)
        run2.font.name = 'Times New Roman'
        instr = parse_xml(
            f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>')
        run2._r.append(instr)
        run3 = p.add_run()
        run3.font.size = Pt(9)
        fld_char_end = parse_xml(
            f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>')
        run3._r.append(fld_char_end)


# ---------------------------------------------------------------------------
# Collect note text following a table
# ---------------------------------------------------------------------------

def collect_notes(lines, i):
    """Starting at index i, check for and collect *Note(s):* lines."""
    # Skip blank lines
    while i < len(lines) and not lines[i].strip():
        i += 1
    if i >= len(lines):
        return None, i
    stripped = lines[i].strip()
    if re.match(r'^\*Notes?:', stripped):
        notes = stripped.strip('*').strip()
        i += 1
        while (i < len(lines) and lines[i].strip()
               and not lines[i].strip().startswith('#')
               and not lines[i].strip().startswith(':')
               and not lines[i].strip().startswith('**Table')
               and not lines[i].strip().startswith('|')
               and not re.match(r'^!\[', lines[i].strip())):
            notes += ' ' + lines[i].strip().strip('*').strip()
            i += 1
        return notes, i
    return None, i


# ---------------------------------------------------------------------------
# Main converter
# ---------------------------------------------------------------------------

def convert_paper():
    paper_dir = Path(__file__).parent
    md_path = paper_dir / 'paper.md'
    docx_path = paper_dir / 'multilateral_140_country.docx'

    text = md_path.read_text(encoding='utf-8')

    # Parse front matter and bibliography
    meta, body = parse_yaml_front_matter(text)
    bib_path = paper_dir / meta.get('bibliography', 'references.bib')
    bib = parse_bib(bib_path) if bib_path.exists() else {}

    doc = Document()

    # --- Page setup ---
    for section in doc.sections:
        section.top_margin = Cm(2.54)
        section.bottom_margin = Cm(2.54)
        section.left_margin = Cm(2.54)
        section.right_margin = Cm(2.54)

    # --- Default font ---
    style = doc.styles['Normal']
    font = style.font
    font.name = 'Times New Roman'
    font.size = Pt(11)
    style.paragraph_format.space_after = Pt(6)
    style.paragraph_format.line_spacing = 1.15

    # Update heading styles
    for lvl in range(1, 4):
        hs = doc.styles[f'Heading {lvl}']
        hs.font.name = 'Times New Roman'
        hs.font.color.rgb = RGBColor(0, 0, 0)
        if lvl == 1:
            hs.font.size = Pt(16)
            hs.paragraph_format.space_before = Pt(24)
            hs.paragraph_format.space_after = Pt(12)
        elif lvl == 2:
            hs.font.size = Pt(13)
            hs.paragraph_format.space_before = Pt(18)
            hs.paragraph_format.space_after = Pt(8)
        elif lvl == 3:
            hs.font.size = Pt(11)
            hs.paragraph_format.space_before = Pt(12)
            hs.paragraph_format.space_after = Pt(6)

    # --- Title page ---
    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_after = Pt(6)
    add_formatted_run(p, meta.get('title', 'Untitled'), bold=True, size=18)

    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_after = Pt(4)
    add_formatted_run(p, meta.get('author', ''), size=12)

    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_after = Pt(4)
    add_formatted_run(p, meta.get('date', ''), size=12)

    # Version line
    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.space_after = Pt(18)
    add_formatted_run(p, f'Version {VERSION}', size=10,
                      color=(120, 120, 120))

    # Abstract
    if 'abstract' in meta:
        p = doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        p.space_after = Pt(6)
        add_formatted_run(p, 'Abstract', bold=True, size=11)

        p = doc.add_paragraph()
        p.paragraph_format.left_indent = Cm(1.27)
        p.paragraph_format.right_indent = Cm(1.27)
        p.space_after = Pt(12)
        add_rich_text(p, meta['abstract'], base_size=10, bib=bib)

    # Keywords and JEL
    if 'keywords' in meta:
        p = doc.add_paragraph()
        p.space_after = Pt(2)
        add_formatted_run(p, 'Keywords: ', bold=True, size=10)
        add_formatted_run(p, meta['keywords'], size=10)
    if 'jel' in meta:
        p = doc.add_paragraph()
        p.space_after = Pt(18)
        add_formatted_run(p, 'JEL Classification: ', bold=True, size=10)
        add_formatted_run(p, meta['jel'], size=10)

    # Page break after title page
    doc.add_page_break()

    # --- Process body ---
    lines = body.split('\n')
    i = 0
    table_counter = 0
    figure_counter = 0
    cited_keys = set()

    # Collect all cited keys for references section
    for ck in re.finditer(r'@(\w+\d{4}\w?)', body):
        cited_keys.add(ck.group(1))

    while i < len(lines):
        line = lines[i]

        # Skip empty lines
        if not line.strip():
            i += 1
            continue

        # Skip pandoc div markers
        if line.strip().startswith(':::'):
            i += 1
            continue

        # \newpage → page break
        if line.strip() == r'\newpage':
            doc.add_page_break()
            i += 1
            continue

        # Headings
        heading_match = re.match(r'^(#{1,3})\s+(.*)', line)
        if heading_match:
            level = len(heading_match.group(1))
            heading_text = heading_match.group(2).strip()

            # References section — render bibliography
            if heading_text.lower() == 'references':
                h = doc.add_heading(level=level)
                add_rich_text(h, heading_text,
                              base_size={1: 16, 2: 13, 3: 11}[level],
                              bib=bib)
                # Render bibliography entries
                sorted_keys = sorted(
                    cited_keys,
                    key=lambda k: (bib.get(k, {}).get('author', 'ZZZ'),
                                   bib.get(k, {}).get('year', '9999'))
                )
                for bk in sorted_keys:
                    if bk in bib:
                        entry_text = format_bib_entry(bk, bib[bk])
                        p = doc.add_paragraph()
                        p.paragraph_format.left_indent = Cm(1.27)
                        p.paragraph_format.first_line_indent = Cm(-1.27)
                        p.space_after = Pt(4)
                        add_rich_text(p, entry_text, base_size=10, bib=bib)
                # Skip until next heading or end
                i += 1
                while i < len(lines):
                    if re.match(r'^#{1,3}\s', lines[i]):
                        break
                    i += 1
                continue

            h = doc.add_heading(level=level)
            add_rich_text(h, heading_text,
                          base_size={1: 16, 2: 13, 3: 11}[level], bib=bib)
            i += 1
            continue

        # Display equations ($$...$$)
        if line.strip().startswith('$$'):
            eq_lines = [line]
            if not line.strip().endswith('$$') or line.strip() == '$$':
                i += 1
                while i < len(lines):
                    eq_lines.append(lines[i])
                    if lines[i].strip().endswith('$$'):
                        break
                    i += 1
            eq_text = ' '.join(eq_lines)
            eq_text = eq_text.replace('$$', '').strip()

            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            p.space_before = Pt(6)
            p.space_after = Pt(6)
            add_rich_text(p, eq_text, base_size=11, base_italic=True, bib=bib)
            i += 1
            continue

        # Figure: ![caption](figures/path.png){#fig:label}
        fig_match = re.match(
            r'^!\[(.*?)\]\((figures/[^)]+)\)(?:\{[^}]*\})?\s*$', line
        )
        if fig_match:
            figure_counter += 1
            caption = fig_match.group(1)
            img_path = fig_match.group(2)
            add_figure(doc, img_path, caption, figure_counter, paper_dir)
            i += 1
            continue

        # External table include: ![](tables/file.md)
        ext_table_match = re.match(
            r'^!\[\]\((tables/[^)]+\.md)\)\s*$', line
        )
        if ext_table_match:
            table_counter += 1
            rel_path = ext_table_match.group(1)
            table_lines, ext_notes = read_external_table(rel_path, paper_dir)
            if table_lines:
                header, rows = parse_markdown_table(table_lines)
                if header and rows:
                    add_table(doc, header, rows,
                              title=f'Table {table_counter}: '
                                    f'{Path(rel_path).stem.replace("_", " ").title()}',
                              notes=ext_notes, bib=bib)
            i += 1
            continue

        # Pandoc-style table caption: ": Title {#tbl:label}"
        caption_match = re.match(
            r'^:\s+(.*?)(?:\s*\{#tbl:\w+\})?\s*$', line
        )
        if caption_match:
            table_counter += 1
            table_title = (f"Table {table_counter}: "
                           f"{caption_match.group(1).strip()}")
            i += 1
            while i < len(lines) and not lines[i].strip():
                i += 1

            # Check for external table include: ![](tables/file.md)
            ext_match = (re.match(r'^!\[\]\((tables/[^)]+\.md)\)\s*$',
                                  lines[i])
                         if i < len(lines) else None)
            if ext_match:
                rel_path = ext_match.group(1)
                ext_lines, ext_notes = read_external_table(
                    rel_path, paper_dir)
                if ext_lines:
                    header, rows = parse_markdown_table(ext_lines)
                    if header and rows:
                        add_table(doc, header, rows, title=table_title,
                                  notes=ext_notes, bib=bib)
                i += 1
                continue

            # Otherwise collect inline table lines
            table_lines = []
            while i < len(lines) and '|' in lines[i]:
                table_lines.append(lines[i])
                i += 1
            notes, i = collect_notes(lines, i)

            if table_lines:
                header, rows = parse_markdown_table(table_lines)
                if header and rows:
                    add_table(doc, header, rows, title=table_title,
                              notes=notes, bib=bib)
            continue

        # Table title (bold line starting with **Table)
        if line.strip().startswith('**Table'):
            table_title = line.strip().strip('*')
            i += 1
            while i < len(lines) and not lines[i].strip():
                i += 1
            table_lines = []
            while i < len(lines) and '|' in lines[i]:
                table_lines.append(lines[i])
                i += 1
            notes, i = collect_notes(lines, i)

            if table_lines:
                header, rows = parse_markdown_table(table_lines)
                if header and rows:
                    add_table(doc, header, rows, title=table_title,
                              notes=notes, bib=bib)
            continue

        # Inline table (starts with |) without preceding title
        if line.strip().startswith('|'):
            table_lines = []
            while i < len(lines) and '|' in lines[i]:
                table_lines.append(lines[i])
                i += 1
            if table_lines:
                header, rows = parse_markdown_table(table_lines)
                if header and rows:
                    add_table(doc, header, rows, bib=bib)
            continue

        # Numbered lists
        num_match = re.match(r'^(\d+)\.\s+(.*)', line)
        if num_match:
            item_text = num_match.group(2)
            p = doc.add_paragraph(style='List Number')
            add_rich_text(p, item_text, base_size=11, bib=bib)
            i += 1
            continue

        # Bullet points
        bullet_match = re.match(r'^(\s*)-\s+(.*)', line)
        if bullet_match:
            bullet_text = bullet_match.group(2)
            p = doc.add_paragraph(style='List Bullet')
            add_rich_text(p, bullet_text, base_size=11, bib=bib)
            i += 1
            continue

        # Regular paragraph (may span multiple lines)
        para_lines = [line]
        i += 1
        while i < len(lines):
            next_line = lines[i]
            if not next_line.strip():
                break
            if re.match(r'^#{1,3}\s', next_line):
                break
            if next_line.strip().startswith('|'):
                break
            if next_line.strip().startswith('**Table'):
                break
            if re.match(r'^:\s+', next_line):
                break
            if next_line.strip().startswith('$$'):
                break
            if re.match(r'^\s*-\s+', next_line):
                break
            if re.match(r'^\d+\.\s+', next_line):
                break
            if re.match(r'^!\[', next_line):
                break
            if next_line.strip() == r'\newpage':
                break
            if next_line.strip().startswith(':::'):
                break
            para_lines.append(next_line)
            i += 1

        para_text = ' '.join(l.strip() for l in para_lines)
        if para_text.strip():
            p = doc.add_paragraph()
            add_rich_text(p, para_text, base_size=11, bib=bib)

    # --- Page numbers ---
    add_page_numbers(doc)

    # Save
    doc.save(str(docx_path))
    n_tables = len(doc.tables)
    n_figs = figure_counter
    n_refs = len([k for k in cited_keys if k in bib])
    print(f"Saved: {docx_path}")
    print(f"Version: {VERSION}")
    print(f"Tables: {n_tables} | Figures: {n_figs} | References: {n_refs}")


if __name__ == '__main__':
    convert_paper()
